Preprocessing QC statistics ¶

Noam, July 2023¶

In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
import os
MOMAPS_HOME = '/home/labs/hornsteinlab/Collaboration/MOmaps_Noam/MOmaps'
MOMAPS_DATA_HOME = '/home/labs/hornsteinlab/Collaboration/MOmaps'
LOGS_PATH = os.path.join(MOMAPS_DATA_HOME, "outputs/preprocessing/spd/logs/preprocessing_Dec2023")
PLOT_PATH = os.path.join(MOMAPS_HOME, 'src', 'preprocessing', 'notebooks','figures','Neurons')
os.chdir(MOMAPS_HOME)
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams["image.cmap"] = "Set1"
from src.common.lib.preprocessing_utils import rescale_intensity
from src.common.lib.images_qc import *
import contextlib
import io
import matplotlib
import warnings
warnings.filterwarnings('ignore', category=pd.core.common.SettingWithCopyWarning)
from src.common.lib.qc_config_tmp import *
from src.common.lib.image_sampling_utils import *
from matplotlib.colors import LinearSegmentedColormap
from IPython.display import display, Javascript
In [3]:
df = log_files_qc(LOGS_PATH)
df = df[df.cell_line_cond!='SCNA']
df_dapi = df[df.marker=='DAPI']
df_target = df[df.marker!='DAPI']
reading logs of batch4_original_with_brenner
reading logs of batch3_original_with_brenner
reading logs of batch9_original_with_brenner
reading logs of batch6_original_with_brenner
reading logs of batch7_original_with_brenner
reading logs of batch5_original_with_brenner
reading logs of batch8_original_with_brenner

Total of 12 files were read.
Before dup handeling  (398635, 22)
After duplication removal #1: (389910, 23)
After duplication removal #2: (384581, 23)
In [4]:
# choose batches
batches = [f'batch{i}' for i in range(3,10)]
batches
Out[4]:
['batch3', 'batch4', 'batch5', 'batch6', 'batch7', 'batch8', 'batch9']

Raw Files Validation¶

  1. How many site tiff files do we have in each folder?
  2. Are all existing files valid? (tif, at least 2049kB, not corrupetd)
In [5]:
root_directory_raw = os.path.join(MOMAPS_DATA_HOME, 'input', 'images', 'raw', 'SpinningDisk')

batches_raw = [batch.replace("_16bit_no_downsample","") for batch in batches]
raws = run_validate_folder_structure(root_directory_raw, False, panels, markers,PLOT_PATH,marker_info,
                                    cell_lines_to_cond, reps, cell_lines_for_disp, expected_dapi_raw,
                                     batches=batches_raw)
batch3
Folder structure is invalid. Missing paths:
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/FUSHomozygous/panelA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/FUSHomozygous/panelB
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/FUSHomozygous/panelC
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/TDP43/panelA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/TDP43/panelB
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/TDP43/panelC
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/TBK1/panelA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/TBK1/panelB
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/TBK1/panelC
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/WT/panelA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/WT/panelB
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/WT/panelC
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/FUSRevertant/panelA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/FUSRevertant/panelB
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/FUSRevertant/panelC
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/OPTN/panelA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/OPTN/panelB
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/OPTN/panelC
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/FUSHeterozygous/panelA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/FUSHeterozygous/panelB
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/batch3/FUSHeterozygous/panelC
Some files are bad:
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_output
CellProfiler_output
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
CellProfiler_DAPI-count
Total Sites:  38527
========
batch4
Folder structure is valid.
No bad files are found.
Total Sites:  57600
========
batch5
Folder structure is valid.
No bad files are found.
Total Sites:  57600
========
batch6
Folder structure is valid.
No bad files are found.
Total Sites:  57600
========
batch7
Folder structure is valid.
No bad files are found.
Total Sites:  57600
========
batch8
Folder structure is valid.
No bad files are found.
Total Sites:  57600
========
batch9
Folder structure is valid.
No bad files are found.
Total Sites:  57600
========
====================

Processed Files Validation¶

  1. How many site npy files do we have in each folder? -> How many sites survived the pre-processing?
  2. Are all existing files valid? (at least 100kB, npy not corrupted)
In [6]:
root_directory_proc = os.path.join(MOMAPS_DATA_HOME, 'input', 'images', 'processed', 'spd2',
                              'SpinningDisk')
procs = run_validate_folder_structure(root_directory_proc, True, panels, markers,PLOT_PATH,marker_info,
                                    cell_lines_to_cond, reps, cell_lines_for_disp, expected_dapi_raw,
                                     batches=batches)
batch3
Folder structure is invalid. Missing paths:
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHomozygous/Untreated/G3BP1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHomozygous/Untreated/NONO
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHomozygous/Untreated/SQSTM1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHomozygous/Untreated/KIF5A
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHomozygous/Untreated/TDP43
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHomozygous/Untreated/FMRP
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHomozygous/Untreated/PURA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHomozygous/Untreated/CD41
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHomozygous/Untreated/Phalloidin
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TDP43/Untreated/G3BP1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TDP43/Untreated/NONO
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TDP43/Untreated/SQSTM1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TDP43/Untreated/KIF5A
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TDP43/Untreated/TDP43
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TDP43/Untreated/FMRP
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TDP43/Untreated/PURA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TDP43/Untreated/CD41
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TDP43/Untreated/Phalloidin
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TBK1/Untreated/G3BP1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TBK1/Untreated/NONO
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TBK1/Untreated/SQSTM1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TBK1/Untreated/KIF5A
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TBK1/Untreated/TDP43
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TBK1/Untreated/FMRP
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TBK1/Untreated/PURA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TBK1/Untreated/CD41
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/TBK1/Untreated/Phalloidin
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/Untreated/G3BP1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/Untreated/NONO
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/Untreated/SQSTM1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/Untreated/KIF5A
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/Untreated/TDP43
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/Untreated/FMRP
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/Untreated/PURA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/Untreated/CD41
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/Untreated/Phalloidin
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/stress/G3BP1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/stress/NONO
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/stress/SQSTM1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/stress/KIF5A
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/stress/TDP43
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/stress/FMRP
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/stress/PURA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/stress/CD41
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/WT/stress/Phalloidin
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSRevertant/Untreated/G3BP1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSRevertant/Untreated/NONO
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSRevertant/Untreated/SQSTM1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSRevertant/Untreated/KIF5A
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSRevertant/Untreated/TDP43
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSRevertant/Untreated/FMRP
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSRevertant/Untreated/PURA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSRevertant/Untreated/CD41
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSRevertant/Untreated/Phalloidin
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/OPTN/Untreated/G3BP1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/OPTN/Untreated/NONO
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/OPTN/Untreated/SQSTM1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/OPTN/Untreated/KIF5A
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/OPTN/Untreated/TDP43
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/OPTN/Untreated/FMRP
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/OPTN/Untreated/PURA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/OPTN/Untreated/CD41
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/OPTN/Untreated/Phalloidin
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHeterozygous/Untreated/G3BP1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHeterozygous/Untreated/NONO
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHeterozygous/Untreated/SQSTM1
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHeterozygous/Untreated/KIF5A
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHeterozygous/Untreated/TDP43
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHeterozygous/Untreated/FMRP
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHeterozygous/Untreated/PURA
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHeterozygous/Untreated/CD41
/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/batch3/FUSHeterozygous/Untreated/Phalloidin
No bad files are found.
Total Sites:  31504
========
batch4
Folder structure is valid.
No bad files are found.
Total Sites:  44040
========
batch5
Folder structure is valid.
No bad files are found.
Total Sites:  47883
========
batch6
Folder structure is valid.
No bad files are found.
Total Sites:  55828
========
batch7
Folder structure is valid.
No bad files are found.
Total Sites:  56071
========
batch8
Folder structure is valid.
No bad files are found.
Total Sites:  55505
========
batch9
Folder structure is valid.
No bad files are found.
Total Sites:  55574
========
====================

Difference between Raw and Processed¶

In [7]:
display_diff(batches, raws, procs, PLOT_PATH)
batch3
========
batch4
========
batch5
========
batch6
========
batch7
========
batch8
========
batch9
========

Variance in each batch¶

In [8]:
#for batch in list(range(3,9)) + ['7_16bit','8_16bit','9_16bit']:  
for batch in batches:
    with contextlib.redirect_stdout(io.StringIO()):
        var = sample_and_calc_variance(root_directory_proc, batch, 
                                       sample_size_per_markers=200, num_markers=26)
    print(f'{batch} var: ',var)
batch3 var:  0.015569925950291956
batch4 var:  0.014111859890173273
batch5 var:  0.014974081250157117
batch6 var:  0.016460123067249793
batch7 var:  0.016082496154033385
batch8 var:  0.016435503753641743
batch9 var:  0.016846129673614187

filtering qc¶

By order of filtering

1. % site survival after Brenner on DAPI channel¶

Percentage out of the total sites

In [9]:
dapi_filter_by_brenner = show_site_survival_dapi_brenner(df_dapi,batches, line_colors, panels)

2. % Site survival after Cellpose¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.

A site will be filtered out if Cellpose found 0 cells in it.

In [10]:
dapi_filter_by_cellpose = show_site_survival_dapi_cellpose(df_dapi, batches, dapi_filter_by_brenner, line_colors, panels)

3. % Site survival by tiling¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.

A site will be filtered out if after tiling, no tile is containing at least 85% of a cell that Cellpose detected.

In [11]:
dapi_filter_by_tiling=show_site_survival_dapi_tiling(df_dapi, batches, dapi_filter_by_cellpose, line_colors, panels)

4. % Site survival after Brenner on target channel¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).

In [12]:
show_site_survival_target_brenner(df_dapi, df_target, dapi_filter_by_tiling)

Numbers!¶

  1. Total number of tiles: for each condition, we can know how many tiles we have --> number of data points for the model to train and infer on --> number of points in UMAPs..
  2. Total number of whole cells: for each condtion, we can know how many whole cells we have
In [13]:
names = ['Total number of tiles', 'Total number of whole cells']
stats = ['n_valid_tiles','site_whole_cells_counts_sum','site_cell_count']
total_sum = calc_total_sums(df_target, df_dapi, stats)
    

for stat, name in zip(stats[:2], names):
    to_heatmap = total_sum.rename(columns={stat:'index'})
    plot_filtering_heatmap(to_heatmap, extra_index='marker', vmin=None, vmax=None,
                          xlabel = name, show_sum=True, figsize=(6,8))
In [14]:
show_total_sum_tables(total_sum)
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch3
count 249.000000 249.000000 249.000000 249.000000
mean 540.409639 5.404096 583.136546 1527.702811
std 302.460196 3.024602 331.053130 874.566713
min 1.000000 0.010000 1.000000 4.000000
25% 357.000000 3.570000 381.000000 954.000000
50% 534.000000 5.340000 570.000000 1557.000000
75% 785.000000 7.850000 849.000000 2173.000000
max 1176.000000 11.760000 1306.000000 3471.000000
sum 134562.000000 NaN 145201.000000 380398.000000
expected_count 450.000000 450.000000 450.000000 450.000000
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch4
count 393.000000 393.000000 393.000000 393.000000
mean 514.633588 5.146336 558.643766 1292.000000
std 334.876650 3.348766 370.766182 874.038343
min 6.000000 0.060000 6.000000 9.000000
25% 196.000000 1.960000 203.000000 469.000000
50% 474.000000 4.740000 511.000000 1265.000000
75% 855.000000 8.550000 934.000000 2070.000000
max 1158.000000 11.580000 1294.000000 3027.000000
sum 202251.000000 NaN 219547.000000 507756.000000
expected_count 450.000000 450.000000 450.000000 450.000000
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch5
count 399.000000 399.000000 399.000000 399.000000
mean 572.561404 5.725614 624.045113 1452.187970
std 330.515747 3.305157 365.806156 873.976617
min 1.000000 0.010000 1.000000 4.000000
25% 251.000000 2.510000 270.000000 618.000000
50% 590.000000 5.900000 626.000000 1522.000000
75% 857.500000 8.575000 939.000000 2214.500000
max 1157.000000 11.570000 1281.000000 2988.000000
sum 228452.000000 NaN 248994.000000 579423.000000
expected_count 450.000000 450.000000 450.000000 450.000000
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch6
count 400.00000 400.000000 400.000000 400.000000
mean 710.73250 7.107325 757.992500 2006.835000
std 176.86102 1.768610 192.619517 517.149942
min 73.00000 0.730000 74.000000 214.000000
25% 589.00000 5.890000 625.000000 1645.750000
50% 717.50000 7.175000 765.000000 2056.500000
75% 834.00000 8.340000 874.000000 2363.000000
max 1095.00000 10.950000 1189.000000 2996.000000
sum 284293.00000 NaN 303197.000000 802734.000000
expected_count 450.00000 450.000000 450.000000 450.000000
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch7
count 400.000000 400.000000 400.0000 400.000000
mean 697.820000 6.978200 743.9700 1822.452500
std 172.560149 1.725601 188.6011 466.101587
min 365.000000 3.650000 390.0000 867.000000
25% 572.000000 5.720000 612.2500 1483.750000
50% 698.000000 6.980000 737.5000 1808.500000
75% 827.500000 8.275000 881.2500 2155.000000
max 1178.000000 11.780000 1261.0000 3164.000000
sum 279128.000000 NaN 297588.0000 728981.000000
expected_count 450.000000 450.000000 450.0000 450.000000
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch8
count 400.000000 400.000000 400.000000 400.000000
mean 646.270000 6.462700 685.905000 1736.702500
std 174.682834 1.746828 188.968221 498.600366
min 73.000000 0.730000 79.000000 176.000000
25% 532.500000 5.325000 559.000000 1419.500000
50% 656.000000 6.560000 694.500000 1753.000000
75% 763.000000 7.630000 812.000000 2093.250000
max 1086.000000 10.860000 1189.000000 2996.000000
sum 258508.000000 NaN 274362.000000 694681.000000
expected_count 450.000000 450.000000 450.000000 450.000000
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch9
count 398.000000 398.000000 398.000000 398.000000
mean 725.766332 7.257663 775.123116 2009.376884
std 197.438198 1.974382 215.173850 545.844936
min 92.000000 0.920000 97.000000 224.000000
25% 577.000000 5.770000 611.250000 1641.250000
50% 742.500000 7.425000 790.000000 2086.500000
75% 879.500000 8.795000 939.500000 2411.500000
max 1205.000000 12.050000 1323.000000 3215.000000
sum 288855.000000 NaN 308499.000000 799732.000000
expected_count 450.000000 450.000000 450.000000 450.000000
n valid tiles % valid tiles site_whole_cells_counts_sum site_cell_count
All batches
count 2.639000e+03 2639.000000 2.639000e+03 2.639000e+03
mean 6.351076e+02 6.351076 6.810868e+02 1.702806e+03
std 2.603559e+02 2.603559 2.841570e+02 7.242192e+02
min 1.000000e+00 0.010000 1.000000e+00 4.000000e+00
25% 4.860000e+02 4.860000 5.140000e+02 1.289000e+03
50% 6.640000e+02 6.640000 7.080000e+02 1.775000e+03
75% 8.220000e+02 8.220000 8.815000e+02 2.231000e+03
max 1.205000e+03 12.050000 1.323000e+03 3.471000e+03
sum 1.676049e+06 NaN 1.797388e+06 4.493705e+06
expected_count 4.500000e+02 450.000000 4.500000e+02 4.500000e+02

Number of Cells in Site for each batch and cell line¶

In [15]:
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]
plot_cell_count(df_no_empty_sites, lines_order, custom_palette, y='site_cell_count_sum', 
                title='Cell Count Average per Site (from tiles)')

plot_cell_count(df_no_empty_sites, lines_order, custom_palette, y='site_whole_cells_counts_sum',
                title='Whole Cell Count Average per Site')

plot_cell_count(df_no_empty_sites, lines_order, custom_palette, y='site_cell_count',
               title='Cellpose Cell Count Average per Site')

number of valid tiles per image (site)¶

In [16]:
plot_catplot(df_dapi, custom_palette,reps, x='n_valid_tiles', x_title='valid tiles count', batch_min=3, batch_max=9)

Heatmap QC per batch, panel and cell line(tiles that passed QC condition) ¶

In [17]:
plot_hm(df_dapi, split_by='rep', rows='cell_line', columns='panel')
In [18]:
# For supp figure 1
plot_hm_combine_batches(df_dapi,  batches=['batch7','batch8'], 
                        reps = ['rep1','rep2'],rows='cell_line', columns='panel')

Assessing Staining Reproducibility and Outliers¶

In [ ]:
for batch in batches:
    print(batch)
    #batch_num = batch.replace('batch',"")
    run_calc_hist_new(batch,cell_lines_for_disp, markers, 
                           hist_sample=10,sample_size_per_markers=200, ncols=7, nrows=4)
    print("="*30)
batch3
/home/labs/hornsteinlab/Collaboration/MOmaps_Noam/MOmaps/src/common/lib/images_qc.py:914: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  plt.tight_layout()
/home/labs/hornsteinlab/Collaboration/MOmaps/anaconda3/momaps_37/lib/python3.7/site-packages/IPython/core/pylabtools.py:151: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  fig.canvas.print_figure(bytes_io, **kw)
==============================
batch4
==============================
batch5
==============================
batch6
==============================
batch7
==============================
batch8
==============================
batch9
In [ ]:
# save notebook as HTML ( the HTML will be saved in the same folder the original script is)
display(Javascript('IPython.notebook.save_checkpoint();'))
os.system('jupyter nbconvert --to html src/preprocessing/notebooks/no_ds/qc_report_newPP.ipynb')
In [ ]:
# save notebook as HTML ( the HTML will be saved in the same folder the original script is)
display(Javascript('IPython.notebook.save_checkpoint();'))
os.system('jupyter nbconvert --to pdf src/preprocessing/notebooks/no_ds/qc_report_newPP.ipynb')
In [ ]:
import pandoc
In [ ]: